<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
  <head>

    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <meta content="Cask Data, Inc." name="author" />
<meta content="Copyright © 2018 Cask Data, Inc." name="copyright" />


    <meta name="git_release" content="6.1.1">
    <meta name="git_hash" content="05fbac36f9f7aadeb44f5728cea35136dbc243e5">
    <meta name="git_timestamp" content="2020-02-09 08:22:47 +0800">
    <title>Feature Generation</title>

    <link rel="stylesheet" href="../_static/cdap-bootstrap.css" type="text/css" />
    <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
    <link rel="stylesheet" href="../_static/bootstrap-3.3.6/css/bootstrap.min.css" type="text/css" />
    <link rel="stylesheet" href="../_static/bootstrap-3.3.6/css/bootstrap-theme.min.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/bootstrap-sphinx.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-dynamicscrollspy-4.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/abixTreeList-2.css" type="text/css" />
    <link rel="stylesheet" href="../_static/cdap-bootstrap.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-hide-toc.css" type="text/css" />

    <script type="text/javascript">
      var DOCUMENTATION_OPTIONS = {
        URL_ROOT:    '',
        VERSION:     '6.1.1',
        COLLAPSE_INDEX: false,
        FILE_SUFFIX: '.html',
        HAS_SOURCE:  false
      };
    </script>
    <script type="text/javascript" src="../_static/jquery.js"></script>
    <script type="text/javascript" src="../_static/underscore.js"></script>
    <script type="text/javascript" src="../_static/doctools.js"></script>
    <script type="text/javascript" src="../_static/language_data.js"></script>

    <link rel="shortcut icon" href="../_static/favicon.ico"/>
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
    <link rel="top" title="Cask Data Application Platform 6.1.1 Documentation" href="../index.html" />
    <link rel="up" title="Analytics" href="index.html" />
    <link rel="next" title="Modeling" href="modeling.html" />
    <link rel="prev" title="Concepts and Terminology" href="concepts.html" />
    <!-- block extrahead -->
    <meta charset='utf-8'>
    <meta http-equiv='X-UA-Compatible' content='IE=edge,chrome=1'>
    <meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1'>
    <meta name="apple-mobile-web-app-capable" content="yes">
    <!-- block extrahead end -->

</head>
<body role="document">

<!-- block navbar -->
<div id="navbar" class="navbar navbar-inverse navbar-default navbar-fixed-top">
    <div class="container-fluid">
      <div class="row">
        <div class="navbar-header">
          <!-- .btn-navbar is used as the toggle for collapsed navbar content -->
          <a class="navbar-brand" href="../table-of-contents/../../index.html">
            <span><img alt="CDAP logo" src="../_static/cdap_logo.svg"/></span>
          </a>

          <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".nav-collapse">
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
          </button>

          <div class="pull-right">
            <div class="dropdown version-dropdown">
              <a href="#" class="dropdown-toggle" data-toggle="dropdown"
                role="button" aria-haspopup="true" aria-expanded="false">
                v 6.1.1 <span class="caret"></span>
              </a>
              <ul class="dropdown-menu">
                <li><a href="//docs.cdap.io/cdap/5.1.2/en/index.html">v 5.1.2</a></li>
                <li><a href="//docs.cdap.io/cdap/4.3.4/en/index.html">v 4.3.4</a></li>
              </ul>
            </div>
          </div>
          <form class="navbar-form navbar-right navbar-search" action="../search.html" method="get">
            <div class="form-group">
              <div class="navbar-search-image material-icons"></div>
              <input type="text" name="q" class="form-control" placeholder="  Search" />
            </div>
            <input type="hidden" name="check_keywords" value="yes" />
            <input type="hidden" name="area" value="default" />
          </form>

          <div class="collapse navbar-collapse nav-collapse navbar-right navbar-navigation">
            <ul class="nav navbar-nav"><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../index.html">简介</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link current" href="../table-of-contents/../../guides.html">手册</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../reference-manual/index.html">参考</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../faqs/index.html">帮助</a></li>
            </ul>
          </div>

        </div>
      </div>
    </div>
  </div><!-- block navbar end -->
<!-- block main content -->
<div class="main-container container">
  <div class="row"><div class="col-md-2">
      <div id="sidebar" class="bs-sidenav scrollable-y-outside" role="complementary">
<!-- theme_manual: user-guide -->
<!-- theme_manual_highlight: guides -->
<!-- sidebar_title_link: ../table-of-contents/../../guides.html -->

  <div role="note" aria-label="manuals links"><h3><a href="../table-of-contents/../../guides.html">Guides</a></h3>

    <ul class="this-page-menu">
      <li class="toctree-l1"><b><a href="../table-of-contents/../../user-guide/index.html" rel="nofollow">用户手册</a></b>
      <nav class="pagenav">
      <ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../index.html"> 简介</a></li>
<li class="toctree-l1"><a class="reference internal" href="../overview.html"> 概述</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/index.html"> 入门指南</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/campaign.html">MySQL 客户数据</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/nytimes-xml.html">纽约时报 XML 数据推送</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/stocks.html">股票选择</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/fitbit.html">物联网 IoT 设备数据</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../data-preparation/index.html"> 数据预处理</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/concepts.html">      概念</a></li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/directives/index.html">      数据处理指令</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/catalog-lookup.html">catalog-lookup</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/change-column-case.html">change-column-case</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/changing-case.html">changing-case</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/cleanse-column-names.html">cleanse-column-names</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/columns-replace.html">columns-replace</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/copy.html">copy</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/cut-character.html">cut-character</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/decode.html">decode</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/diff-date.html">diff-date</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/drop.html">drop</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/encode.html">encode</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/extract-regex-groups.html">extract-regex-groups</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/fail.html">fail</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/fill-null-or-empty.html">fill-null-or-empty</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/filter-row-if-matched.html">filter-row-if-matched</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/filter-row-if-true.html">filter-row-if-true</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/filter-rows-on.html">filter-rows-on</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/find-and-replace.html">find-and-replace</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/flatten.html">flatten</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/format-date.html">format-date</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/format-unix-timestamp.html">format-unix-timestamp</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/generate-uuid.html">generate-uuid</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/hash.html">hash</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/increment-variable.html">increment-variable</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/index-split.html">index-split</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/invoke-http.html">invoke-http</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/json-path.html">json-path</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/keep.html">keep</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/mask-number.html">mask-number</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/mask-shuffle.html">mask-shuffle</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/merge.html">merge</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-avro-file.html">parse-as-avro-file</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-avro.html">parse-as-avro</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-csv.html">parse-as-csv</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-date.html">parse-as-date</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-excel.html">parse-as-excel</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-fixed-length.html">parse-as-fixed-length</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-hl7.html">parse-as-hl7</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-json.html">parse-as-json</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-log.html">parse-as-log</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-simple-date.html">parse-as-simple-date</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-xml.html">parse-as-xml</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-timestamp.html">parse-timestamp</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-xml-to-json.html">parse-xml-to-json</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/quantize.html">quantize</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/rename.html">rename</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/send-to-error.html">send-to-error</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-charset.html">set-charset</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-column.html">set-column</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-columns.html">set-columns</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-record-delim.html">set-record-delim</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-type.html">set-type</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-variable.html">set-variable</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-by-separator.html">split-by-separator</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-email.html">split-email</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-to-columns.html">split-to-columns</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-to-rows.html">split-to-rows</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-url.html">split-url</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/stemming.html">stemming</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/swap.html">swap</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/table-lookup.html">table-lookup</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/text-distance.html">text-distance</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/text-metric.html">text-metric</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/trim.html">trim</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/url-decode.html">url-decode</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/url-encode.html">url-encode</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/write-as-csv.html">write-as-csv</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/write-as-json-map.html">write-as-json-map</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/write-as-json-object.html">write-as-json-object</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/xpath.html">xpath</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/functions/index.html">      函数</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/json-functions.html">JSON 函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/type-functions.html">类型函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/geofence-functions.html">地理围栏函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/dq-functions.html">数据质量函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/date-functions.html">日期函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/ddl-functions.html">DDL 函数</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/service/index.html">      服务</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/admin.html">行政和管理服务</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/connection-properties.html">连接属性</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/connections.html">连接服务</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/execution.html">数据处理指令执行</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/request.html">请求格式规范</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/schema-registry.html">Schema 注册库</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/services.html">数据预处理服务</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/performance.html">性能</a></li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/exclusion-and-aliasing.html">排除与别名</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../pipelines/index.html"> 数据流管道</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/concepts-design.html"> 概念与设计</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/getting-started.html"> 入门指南</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/studio.html"> CDAP 数据流设计器</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/creating-pipelines.html"> 创建数据流管道</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/running-pipelines.html"> 运行数据流管道</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/plugin-management.html"> 插件管理</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/plugins/index.html"> 插件参考</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/actions/index.html"> Action Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/sources/index.html"> Source Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/transforms/index.html"> Transform Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/analytics/index.html"> Analytic Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/sinks/index.html"> Sink Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/shared-plugins/index.html"> Shared Plugins</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../pipelines/plugins/shared-plugins/core.html">CoreValidator</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/post-run-plugins/index.html"> Post-run Plugins</a><ul class="simple">
</ul>
</li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1 current"><a class="reference internal" href="index.html"> 数据分析</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="concepts.html"> Concepts</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#"> Feature Generation</a></li>
<li class="toctree-l2"><a class="reference internal" href="modeling.html"> Modeling</a></li>
<li class="toctree-l2"><a class="reference internal" href="example.html"> Example</a></li>
</ul>
</li>
</ul>
</nav>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../developer-manual/index.html" rel="nofollow">开发手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../admin-manual/index.html" rel="nofollow">管理手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../integrations/index.html" rel="nofollow">集成手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../examples-manual/index.html" rel="nofollow">最佳实践</a>
      </li>
    </ul>
  </div></div>
    </div><div class="col-md-8 content" id="main-content">
    
  <div class="section" id="feature-generation">
<span id="user-guide-mmds-feature-gen"></span><h1>Feature Generation<a class="headerlink" href="#feature-generation" title="Permalink to this headline">🔗</a></h1>
<p>Before you can train a model, you must generate features by using the <a class="reference internal" href="../data-preparation/index.html#data-prep-user-guide-index"><span class="std std-ref">数据预处理 Application</span></a>.
The 数据预处理 application enables you to get interactive feedback while you clean and transform your data source.</p>
<p>Each row is a data point, and each column is a feature.
In the example below, we parsed our input, renamed columns, set various data types,
and filtered out real estate prices that are under one million dollars.
The outcome is price, and the features are city, zip, type, beds, baths, sqft, lot, stories, and year_built.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mmds-feature-gen.png"><img alt="../_images/mmds-feature-gen.png" class="bordered-image" src="../_images/mmds-feature-gen.png" style="width: 800px;" /></a>
</div>
<div class="section" id="feature-types">
<h2>Feature Types<a class="headerlink" href="#feature-types" title="Permalink to this headline">🔗</a></h2>
<p>When generating features, pay close attention to the feature type. Features can be numeric or categorical.</p>
<blockquote>
<div><ul class="simple">
<li>String and boolean columns are treated as <cite>categorical</cite> features.</li>
<li>Short, integer, long, float, and double columns are treated as <cite>numeric</cite> features.</li>
</ul>
</div></blockquote>
<p>One common mistake is setting a categorical type on a column that should be numeric.
Categories represent distinct values, and modeling algorithms do not expect a large number of distinct values.
In the example, setting the sqft column to a string instead of a double would result in thousands of distinct values
and cause a failure during model training.</p>
<p>Not all numbers need to be treated as a numeric feature. In the example, the zip is left as a string,
because each zip code represents a distinct value, not a number on a sliding scale.
Additionally, zip is unlike sqft where we expect a higher number to indicate a higher price.
The zip is more like city, where we expect certain values to correlate to certain price ranges.
Therefore, setting zip as an integer is incorrect.</p>
</div>
<div class="section" id="outliers">
<h2>Outliers<a class="headerlink" href="#outliers" title="Permalink to this headline">🔗</a></h2>
<p>Generally, you should analyze your features to identify outliers to filter out or clean.
For example, if gender is a feature, you might expect only <cite>male</cite> or <cite>female</cite> values.
However, your data might contain typos like <cite>femal</cite> or abbreviations like <cite>f</cite> instead of <cite>female</cite>.
You will want to clean your data so that all the values are standardized to <cite>female</cite>.</p>
<p>For numeric features, you should look for outliers that could indicate dirty data.
In the earlier real estate example, you might want to filter out a data point of 100000 square feet.
This type of analysis is difficult to perform when looking at separate data points.
Finding these types of errors is easier using statistics that are calculated over the entire data source.</p>
</div>
<div class="section" id="aggregate-statistics">
<h2>Aggregate statistics<a class="headerlink" href="#aggregate-statistics" title="Permalink to this headline">🔗</a></h2>
<p>After you split the data, you can view aggregate statistics for each feature.</p>
<p>For categorical features, you can view a histogram of the values.
The example below shows the house type, most of which are the types <cite>Single-Family Home</cite> or <cite>Condo</cite>.
Based on this histogram, you might want to filter out the <cite>Income/Investment</cite> data points and replace <cite>Townhouse</cite> with <cite>Condo</cite>.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mmds-data-split.png"><img alt="../_images/mmds-data-split.png" class="bordered-image" src="../_images/mmds-data-split.png" style="width: 800px;" /></a>
</div>
<p>To modify your features, click the <em>Edit</em> button near the top of the screen to go back to the
数据预处理 view where you can perform further cleaning and transformation.
Once you are happy with your features, click the <em>Done</em> button at the bottom of the to proceed with model training.</p>
</div>
</div>

</div>
    <div class="col-md-2">
      <div id="right-sidebar" class="bs-sidenav scrollable-y" role="complementary">
        <div id="localtoc-scrollspy">
        </div>
      </div>
    </div></div>
</div>
<!-- block main content end -->
<!-- block footer -->
<footer class="footer">
      <div class="container">
        <div class="row">
          <div class="col-md-2 footer-left"><a title="Concepts and Terminology" href="concepts.html" />Previous</a></div>
          <div class="col-md-8 footer-center"><a class="footer-tab-link" href="../table-of-contents/../../reference-manual/licenses/index.html">Copyright</a> &copy; 2014-2020 Cask Data, Inc.&bull; <a class="footer-tab-link" href="//docs.cask.co/cdap/6.1.1/cdap-docs-6.1.1-web.zip" rel="nofollow">Download</a> an archive or
<a class="footer-tab-link" href="//docs.cask.co/cdap">switch the version</a> of the documentation
          </div>
          <div class="col-md-2 footer-right"><a title="Modeling" href="modeling.html" />Next</a></div>
        </div>
      </div>
    </footer>
<!-- block footer end -->
<script type="text/javascript" src="../_static/bootstrap-3.3.6/js/bootstrap.min.js"></script><script type="text/javascript" src="../_static/js/bootstrap-sphinx.js"></script><script type="text/javascript" src="../_static/js/abixTreeList-2.js"></script><script type="text/javascript" src="../_static/js/cdap-dynamicscrollspy-4.js"></script><script type="text/javascript" src="../_static/js/cdap-version-menu.js"></script><script type="text/javascript" src="../_static/js/copy-to-clipboard.js"></script><script type="text/javascript" src="../_static/js/jquery.mousewheel.min.js"></script><script type="text/javascript" src="../_static/js/jquery.mCustomScrollbar.js"></script><script type="text/javascript" src="../_static/js/js.cookie.js"></script><script type="text/javascript" src="../_static/js/tabbed-parsed-literal-0.2.js"></script><script type="text/javascript" src="../_static/js/cdap-onload-javascript.js"></script><script type="text/javascript" src="../_static/js/cdap-version-menu.js"></script>
    <script src="https://cdap.gitee.io/docs/cdap/json-versions.js"/></script>
  </body>
</html>